# Set Working Directory before starting

# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))

suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))

source("resources/functions_workshop.R")

1. Load Processed Objects

load("objects/processed_objects.RData")

2. Subset Disease Objects

a. subset B and T cells

# T cell CSF object
tcell_obj_csf <- tcell_obj[,names(tcell_obj$status[tcell_obj$status %in% "Disease"])]
tcell_obj_csf <- tcell_obj_csf[,names(tcell_obj_csf$compartment[tcell_obj_csf$compartment %in% "CSF"])]
tcell_obj_csf$expansion_category[tcell_obj_csf$expansion_category %in% c("High","Moderate")] <- "Expanded"
tcell_obj_csf$expansion.subtype <- paste(tcell_obj_csf$expansion_category,tcell_obj_csf$sub.celltype.annot)


# B cell object
bcell_obj_csfpb <- bcell_obj
bcell_obj_csfpb$expansion_category[bcell_obj_csfpb$expansion_category %in% c("High","Moderate")] <- "Expanded"
bcell_obj_csfpb$expansion.subtype <- paste(bcell_obj_csfpb$expansion_category,bcell_obj_csfpb$sub.celltype.annot)

b. B cell gene usage (heatmap)

  • With RNA-Seq alone, it is hard to separate out clonotypes

V genes

  • There is some indication that cells with the same V genes group together, but the patterns are hard to distinguish
genes_bcells             <- row.names(bcell_obj_csfpb)
vgenes_bcells            <- sort(genes_bcells[grep("IGHV",genes_bcells)])

h1 <- dittoHeatmap(bcell_obj_csfpb, genes = vgenes_bcells,
        annot.by = c("sub.celltype.annot","compartment","status"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = TRUE,
        cluster_cols = TRUE,
        main = "V gene usage B cells")

Other Ig genes

  • Elevated IgM expression among B cells
  • There is some level of grouping of CSF cells with IgA1 and IgG
genes_bcells             <- row.names(bcell_obj_csfpb)
other_ig_genes_bcells    <- sort(genes_bcells[grep("IGHA|IGHG|IGHM|IGHD",genes_bcells)])

h2 <- dittoHeatmap(bcell_obj_csfpb, genes = other_ig_genes_bcells,
        annot.by = c("sub.celltype.annot","compartment","status"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = TRUE,
        cluster_cols = FALSE,
        main = "Ig gene usage B cells")

3. Differential Gene Expression

load("objects/csf_vs_pb_DGE.RData")

a. CSF vs. PB

TOP_N <- 100

# Tcells
Idents(tcell_obj) <- tcell_obj$compartment
levels(tcell_obj) <- c("CSF","PB")

dge_compartment_tcells_all <- FindAllMarkers(tcell_obj,logfc.threshold = 0.0,test.use = "wilcox")
dge_compartment_tcells <- dge_compartment_tcells_all[dge_compartment_tcells_all$p_val_adj < 0.05,]
top_genes_tcells       <- dge_compartment_tcells %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

# B cells
Idents(bcell_obj) <- bcell_obj$compartment
levels(bcell_obj) <- c("CSF","PB")

dge_compartment_bcells_all <- FindAllMarkers(bcell_obj,logfc.threshold = 0.0,test.use = "wilcox")
dge_compartment_bcells <- dge_compartment_bcells_all[dge_compartment_bcells_all$p_val_adj < 0.05,]
top_genes_bcells       <- dge_compartment_bcells %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

Heatmaps - T cells

p1 <- dittoHeatmap(tcell_obj, genes = top_genes_tcells$gene,
        annot.by = c("compartment","status", "sub.celltype.annot"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = FALSE,
        cluster_cols = FALSE,
        main = "CSF vs. PB T cells")

# CSF vs PB
p1.2 <- DoHeatmap(tcell_obj,features = top_genes_tcells$gene)
## Warning in DoHeatmap(tcell_obj, features = top_genes_tcells$gene): The
## following features were omitted as they were not found in the scale.data slot
## for the SCT assay: KRT72, SH3BGRL2, AP000547.3, PADI4, CLEC11A, ITGA2B, KRT2,
## CASC1, MEST, AL034550.2, DBN1, SYNGR1, TREML1, LINC02295, PROK2, OSBPL5, REG4,
## CES1, AL109767.1, KRT73-AS1, SLC22A17, MZF1-AS1, PCSK5, SNHG19, AC004832.6,
## MMP28, ZBTB10, CD248, AF213884.3, C6orf99, EFCAB2, AL121944.1, AK5, LRRN3, NOG,
## COL18A1, KIF9, MXD4, CIC, FAM129B, DOPEY2, PERP, ALCAM, SLC25A1, MKNK2,
## ARHGAP35, ALDOC, RNPEPL1, DHCR7, TMEM11, CALHM2, SNAI3, TLNRD1, CXCR6, SS18L2,
## ERG28, SKIL, TNFSF14, CCR5, PDCD1, LTC4S, HMGCR, BLOC1S1, FASN, DHCR24, DYNLT1,
## SCD
p1

p1.2

Heatmaps - B cells

p2 <- dittoHeatmap(bcell_obj, genes = top_genes_bcells$gene,
        annot.by = c("compartment","status", "sub.celltype.annot"),
        scaled.to.max = TRUE,
        show_colnames = FALSE,
        show_rownames = FALSE,
        cluster_cols = FALSE,
        main = "CSF vs. PB B cells")

# CSF vs PB
p2.2 <- DoHeatmap(bcell_obj,features = top_genes_bcells$gene)
## Warning in DoHeatmap(bcell_obj, features = top_genes_bcells$gene): The
## following features were omitted as they were not found in the scale.data slot
## for the SCT assay: NPM1, GPSM3, BTF3, RPL31, PFDN5, RPL4, CCNI, CIRBP, PCBP2,
## NACA, PPDPF, NOP53, RPL22, EEF1D, RACK1, RPL10A, FAU, RPL23A, RPL35A, RPS9,
## EEF2, RPL18, RPL5, RPL9, RPS5, RPLP2, RPL8, RPSA, S100A13, RGS16, ARID3A,
## EEF1B2, RPS25, RPL36, UBA52, RPL6, RPS4X, RPL24, RPL3, RPL14, RPS24, OSTN-AS1,
## RPS16, ACY3, RPL37A, RPL21, RPS15, RPS6, RPS7, GCAT, EPB41L3, TYMS, AC015849.1,
## ATP8B4, ZMYND19, GPR34, KHDC1, PTPN13, CAMK1, RAPH1, DAPL1, MIXL1, WLS, RPL15,
## RPL29, MAFF, TGM2, ENDOG, KCNK12, RPL12, RPS21, RPL7A, IGHG4, AXL, FCRL4
p2

p2.2

Volcano Plots

v1 <- plotDGEvolcano(dge_compartment_bcells_all,YLIM = c(0,50),title = "B cell DGE CSF vs. PB",cluster_filt = "CSF",
                    p.adj.thresh = 0.05,show_top_genes = 20)
v2 <- plotDGEvolcano(dge_compartment_tcells_all,YLIM = c(0,250),title = "T cell DGE CSF vs. PB",cluster_filt = "CSF",
                    p.adj.thresh = 0.05,show_top_genes = 20)

plot_grid(v1,v2,nrow = 1)

b. T cells Expanded vs. Non-expanded (CSF)

TOP_N <- 12
Idents(tcell_obj_csf) <- tcell_obj_csf$expansion.subtype
levels(tcell_obj_csf) <- c("Expanded CD8 T cells","Non-expanded CD8 T cells","Expanded CD4 T cells","Non-expanded CD4 T cells")

dge_tcell_csf   <- FindAllMarkers(tcell_obj_csf,logfc.threshold = 0.1,test.use = "wilcox")
## Calculating cluster Expanded CD8 T cells
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Non-expanded CD8 T cells
## Calculating cluster Expanded CD4 T cells
## Calculating cluster Non-expanded CD4 T cells
dge_tcell_csf   <- dge_tcell_csf[dge_tcell_csf$p_val_adj < 0.05,]
top_genes_tcells_csf <- dge_tcell_csf %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

-plot T cells

DotPlot(object = tcell_obj_csf,features = unique(top_genes_tcells_csf$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("T cells CSF High vs. Non-Expanded") + coord_flip()

c. B cells Expanded vs. Non-expanded (CSF and PB)

TOP_N <- 10
Idents(bcell_obj_csfpb) <- bcell_obj_csfpb$expansion.subtype
complete_order          <- c("Expanded Plasma cells","Non-expanded Plasma cells",
                             "Expanded Memory B-cells","Non-expanded Memory B-cells",
                             "Expanded naive B-cells","Non-expanded naive B-cells")
complete_order_sub <- complete_order[complete_order %in% unique(bcell_obj_csfpb$expansion.subtype)]
levels(bcell_obj_csfpb) <- complete_order_sub

dge_bcell_csfpb   <- FindAllMarkers(bcell_obj_csfpb,logfc.threshold = 0.1,test.use = "wilcox")
## Calculating cluster Expanded Memory B-cells
## Calculating cluster Non-expanded Memory B-cells
## Calculating cluster Expanded naive B-cells
## Calculating cluster Non-expanded naive B-cells
dge_bcell_csfpb   <- dge_bcell_csfpb[dge_bcell_csfpb$p_val_adj < 0.05,]
top_genes_bcells_csfpb <- dge_bcell_csfpb %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)

-plot B cells

DotPlot(object = bcell_obj_csfpb,features = unique(top_genes_bcells_csfpb$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("B cells CSF High vs. Non-Expanded") + coord_flip()

4. Pathway analysis

suppressPackageStartupMessages(library(ReactomePA))
suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(enrichplot))
suppressPackageStartupMessages(library(ggnewscale))

suppressPackageStartupMessages(library(DOSE))
suppressPackageStartupMessages(library(clusterProfiler))
suppressPackageStartupMessages(library(ggupset))

a. B cells Memory

# Most of the expression should be in the target cluster
# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_bcell_csfpb[dge_bcell_csfpb$cluster %in% c("Expanded Memory B-cells","Non-expanded Memory B-cells"),]

# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)

# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
  egene <- as.character(de_genes[g])
  entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col

de_fc_list        <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id

# P-value vec
de_pval_list        <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id

# Pathway Enrichment
bcell_pathways       <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
bcell_pathways_df    <- as.data.frame(bcell_pathways)

# Top enriched pathways and the gene count within each pathway
barplot(bcell_pathways, showCategory = 14)

Network of genes shared between pathways

bcell_pathways_net <- pairwise_termsim(bcell_pathways,showCategory = 14)
emapplot(bcell_pathways_net, showCategory = 14,
         repel = T,cex_label_group = 0.5,layout = "star")

Network of Pathways and genes (DisGeNET database)

edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)

## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')

## Network categorySize can be scaled by 'pvalue' or 'geneNum' 
p1_bcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)
## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
##  The foldChange parameter will be removed in the next version.
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
## Heatmap
p2_bcell <- heatplot(edox, foldChange=de_fc_list, showCategory=14)

plot_grid(p1_bcell,p2_bcell,ncol = 2)

b. T cells CD8

# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_tcell_csf[dge_tcell_csf$cluster %in% c("Expanded CD8 T cells","Non-expanded CD8 T cells"),]

# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)

# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
  egene <- as.character(de_genes[g])
  entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col

de_fc_list        <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id

# P-value vec
de_pval_list        <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id

tcell_pathways <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
tcell_pathways_df <- as.data.frame(tcell_pathways)

barplot(tcell_pathways, showCategory=10)

Network of genes shared between pathways

tcell_pathways_net <- pairwise_termsim(tcell_pathways,showCategory = 25)
emapplot(tcell_pathways_net, showCategory = 25,repel = T,cex_label_group = 0.5,layout = "kk")

Network of Pathways and genes (DisGeNET database)

edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)

## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')

## Network categorySize can be scaled by 'pvalue' or 'geneNum' 
p1_tcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)
## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
##  The foldChange parameter will be removed in the next version.
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
## Heatmap
p2_tcell <- heatplot(edox, foldChange=de_fc_list, showCategory=25)

plot_grid(p1_tcell,p2_tcell,ncol = 2)